#import resturant csv to my computer
data1 <- read.csv("/Users/lin/Desktop/Restaurant.csv")
head(data1)
class(data1$cuisines)
[1] "factor"
a <- as.character(data1$cuisines)
#I am trying to identity which resturant is vegetarian
library("stringr")
vege <- str_detect(a,"Vegetarian")
data2 <- cbind(vege,data1)
#Filter out the restaurant which is not vegetarian
class(data2$vege)
[1] "logical"
data2$vege <- as.numeric(data2$vege)
#do the subset for vege
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
data3 <- subset(data2,vege==1)
library(dplyr)
data4 <- data3 %>%
select(id,city,name,latitude, longitude,phones,paymentTypes,postalCode) %>%
na.omit()
View(data4)
library(sp)
library(maps)
library(maptools)
Checking rgeos availability: TRUE
# The single argument to this function, pointsDF, is a data.frame in which:
# - column 1 contains the longitude in degrees (negative in the US)
# - column 2 contains the latitude in degrees
latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per county
counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)
# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
countyNames[indices]
}
# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = data4$longitude, y = data4$latitude)
county_list<- latlong2county(testPoints)
county_list_data <- as.data.frame(county_list)
data5 <- cbind(data4,county_list_data) #merge to data4 by adding a column named county_list
data5 <- data5 %>%
na.omit()
unique(data5$county_list)
[1] new york,chautauqua new york,albany new york,saratoga
[4] new york,rensselaer new york,washington new york,ulster
[7] new york,warren new york,westchester new york,kings
[10] new york,queens new york,new york new york,cayuga
[13] new york,bronx new york,dutchess new york,nassau
[16] new york,ontario new york,erie new york,suffolk
[19] new york,onondaga new york,columbia new york,franklin
[22] new york,monroe new york,orange new york,oneida
[25] new york,essex new york,otsego new york,greene
[28] new york,putnam new york,niagara new york,delaware
[31] new york,rockland new york,clinton new york,montgomery
[34] new york,broome new york,tompkins new york,seneca
[37] new york,sullivan new york,madison new york,st lawrence
[40] new york,steuben new york,chemung new york,herkimer
[43] new york,chenango new york,schenectady new york,hamilton
[46] new york,livingston new york,jefferson new york,tioga
[49] new york,cortland new york,yates new york,fulton
[52] new york,wayne new york,schuyler new york,orleans
[55] new york,cattaraugus new york,genesee new york,lewis
[58] new york,oswego
58 Levels: new york,albany new york,bronx ... new york,yates
#regular expression for ny vege restaurant data
county_name <- as.character(data5$county_list)
# Remove all before and up to ",":
county_name2 <- gsub(".*,","",county_name)
data6 <- cbind(county_name2,data5)
data6$county_list <- NULL
data6
unique(data6$county_name2)
[1] chautauqua albany saratoga rensselaer washington ulster
[7] warren westchester kings queens new york cayuga
[13] bronx dutchess nassau ontario erie suffolk
[19] onondaga columbia franklin monroe orange oneida
[25] essex otsego greene putnam niagara delaware
[31] rockland clinton montgomery broome tompkins seneca
[37] sullivan madison st lawrence steuben chemung herkimer
[43] chenango schenectady hamilton livingston jefferson tioga
[49] cortland yates fulton wayne schuyler orleans
[55] cattaraugus genesee lewis oswego
58 Levels: albany bronx broome cattaraugus cayuga chautauqua ... yates
#import election data
eleccounty<-read.csv("/Users/lin/Desktop/2016_US_County_Level_Presidential_Results.csv")
eleccounty$X1 <- NULL
eleccounty <- eleccounty %>%
filter(state_abbr=="NY") #there are 62 counties in NY State
#do some regular expression on election data
foo <- as.character(eleccounty$county_name)
foo2 <- gsub("\\s*\\w*$", "", foo)
foo3 <- tolower(foo2)
foo4 <- str_replace(foo3, "st. lawrence", "st lawrence")
foo4
[1] "albany" "allegany" "bronx" "broome" "cattaraugus"
[6] "cayuga" "chautauqua" "chemung" "chenango" "clinton"
[11] "columbia" "cortland" "delaware" "dutchess" "erie"
[16] "essex" "franklin" "fulton" "genesee" "greene"
[21] "hamilton" "herkimer" "jefferson" "kings" "lewis"
[26] "livingston" "madison" "monroe" "montgomery" "nassau"
[31] "new york" "niagara" "oneida" "onondaga" "ontario"
[36] "orange" "orleans" "oswego" "otsego" "putnam"
[41] "queens" "rensselaer" "richmond" "rockland" "saratoga"
[46] "schenectady" "schoharie" "schuyler" "seneca" "st lawrence"
[51] "steuben" "suffolk" "sullivan" "tioga" "tompkins"
[56] "ulster" "warren" "washington" "wayne" "westchester"
[61] "wyoming" "yates"
#regular expression of final step for election data
new_eleccounty <- cbind(eleccounty,foo4)
names(new_eleccounty)[names(new_eleccounty) == 'foo4'] <- 'county_name2'
#calculate who is win in 2016 on the county level
new_eleccounty <- new_eleccounty %>%
mutate(difference_in_vote=votes_gop-votes_dem) %>%
mutate (R.D= ifelse(difference_in_vote > 0, "Republican", "Democrat"))
#combine cleaned ny vege reastaurant with election county data in 2016
#left outer join
df <- left_join(data6, new_eleccounty,by = "county_name2")
Column `county_name2` joining factors with different levels, coercing to character vector
df$paymentTypes <- as.character(df$paymentTypes)
df$paymentTypes[df$paymentTypes==""] <- "NA"
df$paymentTypes <- as.factor(df$paymentTypes)
#make a leaflet map of vege restaurant in NY States
library(leaflet)
library(RColorBrewer)
pal = colorFactor(palette = c("blue", "red"), domain = df$R.D) # Grab a palette
color_vote = pal(df$R.D)
content2 <- paste("County:",data6$county_name2,"<br/>",
"Restaurant Name:",data6$name,"<br/>",
"Phones:",data6$phones,"<br/>",
"PaymentType:",data6$paymentTypes,"<br/>",
"PostCode:",data6$postalCode,"<br/>"
)
content3 <- paste("County:",df$county_name,"<br/>",
"Restaurant Name:",df$name,"<br/>",
"Phones:",df$phones,"<br/>",
"PaymentType:",df$paymentTypes,"<br/>",
"PostCode",df$postalCode,"<br/>"
)
m2 <- leaflet(data=data6) %>%
# Base groups
addTiles(group = "OSM (default)") %>%
addProviderTiles(providers$Stamen.Toner, group = "Toner") %>%
addProviderTiles(providers$Stamen.TonerLite, group = "Toner Lite") %>%
# Overlay groups
addCircles(
color="green",
lng = ~longitude,
lat = ~latitude,
popup = content2,
group = "Vegetarian Restaurants") %>%
addCircleMarkers(data = df,
lng = ~longitude,
lat = ~latitude,
popup = content3,
group = "Political Ideology",
color= color_vote) %>%
addLegend(pal = pal, values = ~df$R.D, title = "Won Party in 2016") %>%
# Layers control
addLayersControl(
baseGroups = c("OSM (default)", "Toner", "Toner Lite"),
overlayGroups = c("Vegetarian Restaurants", "Political Ideology"),
options = layersControlOptions(collapsed = FALSE)
)
m2
NA